-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[libc] Support %lc in printf #169983
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[libc] Support %lc in printf #169983
Conversation
|
@llvm/pr-subscribers-libc Author: Shubh Pachchigar (shubhe25p) ChangesFull diff: https://github.com/llvm/llvm-project/pull/169983.diff 4 Files Affected:
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 624129b2b36e7..f35a65de1f110 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -111,6 +111,7 @@ add_header_library(
.printf_config
.writer
libc.include.inttypes
+ libc.hdr.types.wchar_t
libc.src.__support.big_int
libc.src.__support.common
libc.src.__support.CPP.limits
@@ -123,6 +124,8 @@ add_header_library(
libc.src.__support.integer_to_string
libc.src.__support.libc_assert
libc.src.__support.uint128
+ libc.src.__support.wchar.mbstate
+ libc.src.__support.wchar.wcrtomb
libc.src.__support.StringUtil.error_to_string
libc.src.string.memory_utils.inline_memcpy
)
diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h
index fd2eb2553887a..31dd34fe7a797 100644
--- a/libc/src/stdio/printf_core/char_converter.h
+++ b/libc/src/stdio/printf_core/char_converter.h
@@ -1,4 +1,4 @@
-//===-- String Converter for printf -----------------------------*- C++ -*-===//
+//===-- Character Converter for printf -----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -9,7 +9,10 @@
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
+#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcrtomb.h"
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"
@@ -20,8 +23,11 @@ namespace printf_core {
template <WriteMode write_mode>
LIBC_INLINE int convert_char(Writer<write_mode> *writer,
const FormatSection &to_conv) {
- char c = static_cast<char>(to_conv.conv_val_raw);
-
+ char c;
+ wchar_t wc;
+ char mb_str[MB_LEN_MAX];
+ internal::mbstate internal_mbstate = {0};
+ int ret = 0;
constexpr int STRING_LEN = 1;
size_t padding_spaces =
@@ -33,7 +39,21 @@ LIBC_INLINE int convert_char(Writer<write_mode> *writer,
RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
}
- RET_IF_RESULT_NEGATIVE(writer->write(c));
+ if (to_conv.length_modifier == LengthModifier::l) {
+ wc = static_cast<wchar_t>(to_conv.conv_val_raw);
+ ret = internal::wcrtomb(mb_str, wc, &internal_mbstate);
+ if (ret <= 0) {
+ return -1;
+ }
+
+ for (int i = 0; i < ret; i++) {
+ RET_IF_RESULT_NEGATIVE(writer->write(mb_str[i]));
+ }
+
+ } else {
+ c = static_cast<char>(to_conv.conv_val_raw);
+ RET_IF_RESULT_NEGATIVE(writer->write(c));
+ }
// If the padding is on the right side, write the spaces last.
if (padding_spaces > 0 &&
diff --git a/libc/test/src/stdio/printf_core/CMakeLists.txt b/libc/test/src/stdio/printf_core/CMakeLists.txt
index ff7ebbc4f5fd0..a4c919420777d 100644
--- a/libc/test/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/test/src/stdio/printf_core/CMakeLists.txt
@@ -35,4 +35,5 @@ add_libc_unittest(
libc.src.stdio.printf_core.converter
libc.src.stdio.printf_core.writer
libc.src.stdio.printf_core.core_structs
+ libc.hdr.types.wchar_t
)
diff --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp
index 2dae2a22c864c..026e36747d4df 100644
--- a/libc/test/src/stdio/printf_core/converter_test.cpp
+++ b/libc/test/src/stdio/printf_core/converter_test.cpp
@@ -9,7 +9,7 @@
#include "src/stdio/printf_core/converter.h"
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"
-
+#include "hdr/types/wchar_t.h"
#include "test/UnitTest/Test.h"
class LlvmLibcPrintfConverterTest : public LIBC_NAMESPACE::testing::Test {
@@ -255,3 +255,56 @@ TEST_F(LlvmLibcPrintfConverterTest, OctConversion) {
ASSERT_STREQ(str, "1234");
ASSERT_EQ(writer.get_chars_written(), size_t{4});
}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversion) {
+
+ LIBC_NAMESPACE::printf_core::FormatSection section;
+ section.has_conv = true;
+ section.raw_string = "%c";
+ section.conv_name = 'c';
+ section.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l;
+ section.conv_val_raw = static_cast<wchar_t>(L'S');
+
+ LIBC_NAMESPACE::printf_core::convert(&writer, section);
+
+ wb.buff[wb.buff_cur] = '\0';
+
+ ASSERT_STREQ(str, "S");
+ ASSERT_EQ(writer.get_chars_written(), size_t{1});
+}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionLeftJustified) {
+ LIBC_NAMESPACE::printf_core::FormatSection left_justified_conv;
+ left_justified_conv.has_conv = true;
+ left_justified_conv.raw_string = "%-4c";
+ left_justified_conv.conv_name = 'c';
+ left_justified_conv.length_modifier =
+ LIBC_NAMESPACE::printf_core::LengthModifier::l;
+ left_justified_conv.flags =
+ LIBC_NAMESPACE::printf_core::FormatFlags::LEFT_JUSTIFIED;
+ left_justified_conv.min_width = 4;
+ left_justified_conv.conv_val_raw = static_cast<wchar_t>(L'S');
+
+ LIBC_NAMESPACE::printf_core::convert(&writer, left_justified_conv);
+ wb.buff[wb.buff_cur] = '\0';
+
+ ASSERT_STREQ(str, "S ");
+ ASSERT_EQ(writer.get_chars_written(), size_t{4});
+}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionRightJustified) {
+ LIBC_NAMESPACE::printf_core::FormatSection right_justified_conv;
+ right_justified_conv.has_conv = true;
+ right_justified_conv.raw_string = "%4c";
+ right_justified_conv.conv_name = 'c';
+ right_justified_conv.length_modifier =
+ LIBC_NAMESPACE::printf_core::LengthModifier::l;
+ right_justified_conv.min_width = 4;
+ right_justified_conv.conv_val_raw = static_cast<wchar_t>(L'S');
+
+ LIBC_NAMESPACE::printf_core::convert(&writer, right_justified_conv);
+ wb.buff[wb.buff_cur] = '\0';
+
+ ASSERT_STREQ(str, " S");
+ ASSERT_EQ(writer.get_chars_written(), size_t{4});
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
This PR has spun up three new interesting directions, each of which seems feasible with some guidance. I am not sure which one can be pulled into this PR and if it is even an issue. Let me know your thoughts @michaelrj-google:
|
38f014a to
c2a7797
Compare
|
|
||
| RET_IF_RESULT_NEGATIVE(writer->write(c)); | ||
| if (to_conv.length_modifier == LengthModifier::l) { | ||
| wc = static_cast<wchar_t>(static_cast<unsigned int>(to_conv.conv_val_raw)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The argument type would be wint_t per standard, so you might need to handle WEOF and then static_cast.
You mentioned BigInt problems in the review thread, could you elaborate how are these relevant?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks this makes sense, what I was referring to was that naively casting from conv_val_raw (BigInt) to wchar_t leads to an error in BigInt to(), as the casting has a guard clause that essentially checks:
struct BigInt {
...
template <typename T>
LIBC_INLINE constexpr cpp::enable_if_t<
cpp::is_integral_v<T> && !cpp::is_same_v<T, bool>, T>
to() const {
... }
is_integral_v will return false if the type is not one of:
char, signed char, unsigned char, short, unsigned short, int,
unsigned int, long, unsigned long, long long, unsigned long long, bool
That is the reason why I cast to unsigned int first. Let me know your thoughts
|
Thank you for starting to work on this!
See comment in CL - I think you need to roundtrip the input arguments through
Could you figure out why is our UTF-32-only wcrtomb gets pulled in by this change? I.e. is there a particular function that is enabled on Windows and depends on the printf functionality?
That sounds like a natural follow-up! |
c2a7797 to
7a33e23
Compare
🐧 Linux x64 Test Results✅ The build succeeded and no tests ran. This is expected in some build configurations. |
Thank you so much for the feedback, it is really helpful @vonosmas. I am not able to find this specific function, can you please help? I can add a macro to guard the wcrtomb in char_converter.h as suggested by @michaelrj-google but I am not sure. |
97751a8 to
cbf857a
Compare
Add support for %lc in printf by calling internal wcrtomb function and relevant end-to-end snprintf test. Additionally, printf parser can also recognize length modifier. Added a flag to disable wchar support on windows platform.
cbf857a to
552b263
Compare
Add %lc support to libc printf by utilizing wcrtomb internal function, also updated parser logic and added relevant end to end snprintf tests. Resolves #166598